library(TDA)
library(tdaTS) #our package
library(tibble)
library(plotly) #3D plotting
library(fields) #image.plot function
library(evd) #gumbelWe already explored the shape of the persistence diagram and of the barcodes. A recent paper proposed a universal distribution of noise for the radio death/birth. We simulate different types of data to reveal the Gumbel shape of the this distribution and hope to be able to discriminate between this shape and the true signal.
myplots <- function(data, complex = "alpha", breaks = 100, normalization = 1/2)
{
if(complex == "alpha")
{
DiagAlphaCmplx <- alphaComplexDiag(X = data,
library = c("GUDHI", "Dionysus"),
location = TRUE)
}
if(complex == "rips")
{
DiagAlphaCmplx <- ripsDiag(X = data,
maxdimension = 1,
maxscale = 0.5,
dist = "euclidean",
library = c("GUDHI", "Dionysus"),
location = TRUE)
}
opar <- par(no.readonly = TRUE)
par(mfrow = c(1,2), mar=c(1,2,1,0), mgp=c(1.5,0.5,0))
plot(DiagAlphaCmplx$diagram, barcode = TRUE)
plot(data, col = 1,xaxt="n", yaxt="n",xlab="", ylab="", asp = 1)
one <- which(DiagAlphaCmplx[["diagram"]][, 1] == 1)
for (i in seq(along = one))
{
for (j in seq_len(dim(DiagAlphaCmplx[["cycleLocation"]][[one[i]]])[1])){lines(DiagAlphaCmplx[["cycleLocation"]][[one[i]]][j, , ] + rnorm(n = 4,sd = 0.0),cex = 1, col = i + 1)}
}
par(opar)
diagALPHA <- DiagAlphaCmplx$diagram
sel <- diagALPHA[,1] == 1
res <- diagALPHA[sel,3]/diagALPHA[sel,2]
gumbel <- normalization*(log(log(res)) - mean(log(log(res)))) + digamma(1)
hist(gumbel, breaks = breaks, probability = TRUE,xlim = c(min(gumbel), max(gumbel)+0.5))
x2 <- seq(min(gumbel), max(gumbel)+0.5, length = 1000)
fun <- exp(x2 - exp(x2))
lines(x2, fun, col = 2, lwd = 2)
return(list(diagram = DiagAlphaCmplx$diagram, maxValue = max(gumbel)))
}data <- data2D_pointSquareHole(5000, 0.075)
pl <- myplots(data); pl$maxValue## [1] 1.65057
data <- data2D_pointSquareHole(5000, 0.15)
pl <- myplots(data); pl$maxValue## [1] 1.817264
data <- data2D_pointSquareHole(5000, 0.3)
pl <- myplots(data); pl$maxValue## [1] 1.979486
data <- data2D_pointSquareHole(50, 0.3)
pl <- myplots(data); pl$maxValue## [1] 1.19714
data <- data2D_pointSquareHole(500, 0.3)
pl <- myplots(data); pl$maxValue## [1] 1.655248
data <- data2D_pointSquareHole(5000, 0.3)
pl <- myplots(data); pl$maxValue## [1] 1.935782
data <- data2D_pointSquareHole(100, 0)
pl <- myplots(data); pl$maxValue## [1] 1.07222
data <- data2D_pointSquareHole(300, 0)
pl <- myplots(data); pl$maxValue## [1] 1.247017
data <- data2D_pointSquareHole(1000, 0)
pl <- myplots(data); pl$maxValue## [1] 1.457478
data<- data2D_pointEllipseMissingArc(100, 0)
pl <- myplots(data); pl$maxValue## [1] 2.137156
data <- data2D_pointEllipseMissingArc(100, 0.1)
pl <- myplots(data); pl$maxValue## [1] 1.131344
data <- data2D_pointEllipseMissingArc(100, 0.2)
pl <- myplots(data); pl$maxValue## [1] 1.227235
data <- data2D_pointEllipseMissingArc(100, 0.3)
pl <- myplots(data); pl$maxValue## [1] 0.6183207
Gaussian versus Uniform noise 500 data points
data<- data2D_pointSquareHole(500, 0)
pl <- myplots(data); pl$maxValue## [1] 1.296193
data<- data.frame(matrix(0, nrow = 500, ncol = 2))
colnames(data) <- c("x", "y")
data$x <- data$x + rnorm(n= 500, mean = 0, sd = 10)
data$y <- data$y + rnorm(n = 500, mean = 0, sd = 10)
pl <- myplots(data); pl$maxValue## [1] 1.328406
Gaussian versus Uniform noise 10000 data points
data<- data2D_pointSquareHole(10000, 0)
pl <- myplots(data); pl$maxValue## [1] 1.499193
data<- data.frame(matrix(0, nrow = 10000, ncol = 2))
colnames(data) <- c("x", "y")
data$x <- data$x + rnorm(n= 10000, mean = 0, sd = 10)
data$y <- data$y + rnorm(n = 10000, mean = 0, sd = 10)
pl <- myplots(data); pl$maxValue## [1] 1.476651
The Square with a hole?
pl <- myplots(data); pl$maxValue## [1] 1.476651
The circle closed + small noise
data <- data2D_pointEllipseMissingArc(10000, 0)
pl <- myplots(data); pl$maxValue## [1] 2.132478
data <- data2D_pointSquareHole(30000, 0)
DiagAlphaCmplx <- alphaComplexDiag(X = data,
library = c("GUDHI"),
location = TRUE)
diagALPHA <- DiagAlphaCmplx$diagram
dim(diagALPHA)## [1] 59471 3
sel <- diagALPHA[,1] == 1
sum(sel)## [1] 29471
res <- diagALPHA[sel,3]/diagALPHA[sel,2]
normalization <- 0.5
gumbel <- normalization*(log(log(res)) - mean(log(log(res)))) + digamma(1)
max(gumbel)## [1] 1.539083
hist(gumbel, breaks = 200, probability = TRUE,xlim = c(min(gumbel), max(gumbel)+1))
x2 <- seq(min(gumbel), max(gumbel)+1, length = 1000)
fun <- exp(x2 - exp(x2))
lines(x2, fun, col = 2, lwd = 2)library(TDAstats)
pvalue <- rep(0,100)
gap <- 0.3
nb <- 300
for(i in 1:100)
{
data <- data2D_pointSquareHole(nb, gap)
diagALPHA <- calculate_homology(data, dim = 1)
sel <- diagALPHA[,1] == 1
res <- diagALPHA[sel,3]/diagALPHA[sel,2]
gumbel <- log(log(res)) - mean(log(log(res))) + digamma(1)
pvalue[i] <- exp(-exp(max(gumbel)))
}
hist(pvalue, breaks = 20, xlim=c(0,1))
abline(v = 0.05)sum(pvalue < 0.005) #how many time we "see" the hole## [1] 94
library(TDAstats)
pvalue <- rep(0,100)
gap <- 0
nb <- 300
for(i in 1:100)
{
data <- data2D_pointSquareHole(nb, gap)
diagALPHA <- calculate_homology(data, dim = 1)
sel <- diagALPHA[,1] == 1
res <- diagALPHA[sel,3]/diagALPHA[sel,2]
gumbel <- log(log(res)) - mean(log(log(res))) + digamma(1)
pvalue[i] <- exp(-exp(max(gumbel)))
}
hist(pvalue, breaks = 20, xlim=c(0,1))
abline(v = 0.05)sum(pvalue < 0.005) #how many time we "see" the hole## [1] 5
pvalues <- rep(0,600)
index <- (floor(0:599/100)+1)
for(i in 1:600)
{
j <- index[i]
data <- data2D_pointEllipseMissingArc(j*100, 0.1, 0.05)
#plot(data)
diagALPHA <- calculate_homology(data, dim = 1)
sel <- diagALPHA[,1] == 1
res <- diagALPHA[sel,3]/diagALPHA[sel,2]
gumbel <- log(log(res)) - mean(log(log(res))) + digamma(1)
pvalues[i] <- exp(-exp(max(gumbel)))
}
plot(pvalues, type = 'l')plot(log(pvalues), type = 'l')